Part of the possible emulator accuracy issues could be satellite fraction issues. Gonna look at those explicitly.
In [161]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [162]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [163]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
In [164]:
em_method = 'gp'
split_method = 'random'
In [165]:
a = 1.0
z = 1.0/a - 1.0
In [166]:
scale_bin_centers = np.array([ 0.09581734, 0.13534558, 0.19118072, 0.27004994,
0.38145568, 0.53882047, 0.76110414, 1.07508818,
1.51860241, 2.14508292, 3.03001016, 4.28000311,
6.04566509, 8.53972892, 12.06268772, 17.0389993 ,
24.06822623, 33.99727318])
In [167]:
bin_idx = 1
fixed_params = {'z':z, 'r': scale_bin_centers[bin_idx]}#, 'cosmo': 0}#, 'r':24.06822623}
In [168]:
np.random.seed(0)
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
custom_mean_function = 'linear', downsample_factor = 0.1)
In [169]:
emu.scale_bin_centers
Out[169]:
In [170]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
In [171]:
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)
t, old_idxs = emu._whiten(test_x)
In [172]:
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y
In [173]:
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat
In [174]:
from pearce.mocks.kittens import TrainingBox
In [175]:
boxno = 0
cat = TrainingBox(boxno, system = 'sherlock')
In [176]:
cat.load(a, HOD='zheng07')
In [ ]:
nd = 1e-4
In [ ]:
hod_pnames = emu.get_param_names()[7:]
mf = cat.calc_mf()
In [ ]:
for pname in hod_pnames:
print pname, emu.get_param_bounds(pname)
In [ ]:
from scipy.optimize import minimize_scalar
def add_logMmin(hod_params, cat):
"""
In the fixed number density case, find the logMmin value that will match the nd given hod_params
:param: hod_params:
The other parameters besides logMmin
:param cat:
the catalog in question
:return:
None. hod_params will have logMmin added to it.
"""
hod_params['logMmin'] = 13.0 #initial guess
#cat.populate(hod_params) #may be overkill, but will ensure params are written everywhere
def func(logMmin, hod_params):
hod_params.update({'logMmin':logMmin})
return (cat.calc_analytic_nd(hod_params) - nd)**2
res = minimize_scalar(func, bounds = (12.0, 16.0), args = (hod_params,), options = {'maxiter':100},\
method = 'Bounded')
# assuming this doens't fail
#print 'logMmin', res.x
hod_params['logMmin'] = res.x
#print hod_params
In [ ]:
sat_fracs = np.zeros((1000,))
sat_nd = np.zeros((1000,))
actual_nd = np.zeros_like(sat_fracs)
log_mMins = np.zeros_like(sat_fracs)
for idx, x in enumerate(test_x[:1000, 7:]):
hod_params = dict(zip(hod_pnames, x))
add_logMmin(hod_params, cat)
log_mMins[idx] = hod_params['logMmin']
sat_hod = cat.calc_hod(hod_params, component='satellite')
sat_nd[idx] = np.sum(mf*sat_hod)/((cat.Lbox/cat.h)**3)
#sat_fracs[idx] = sat_nd/nd
actual_nd[idx] = cat.calc_analytic_nd(hod_params)
sat_fracs = sat_nd/actual_nd
In [ ]:
plt.hist(sat_fracs)
In [ ]:
sat_fracs.mean()
In [ ]:
sat_fracs.std()
In [ ]:
plt.hist(log_mMins)
In [ ]:
hod_pnames
In [ ]:
plt.scatter(test_x[:1000, 9], acc_bin[:1000])
In [ ]:
test_x[:5000,0]
In [ ]:
pnames = emu.get_param_names()
for i in xrange(7):
for j in xrange(7):
mean_acc = np.mean(acc_bin[j*5000:(j+1)*5000])
plt.scatter(test_x[j*5000, i], mean_acc, label = 'Cosmo %d'%j)
plt.xlabel(pnames[i])
plt.ylabel('Avg. Percent Accurate')
plt.title('r = %.2f'%scale_bin_centers[bin_idx])
plt.legend(loc = 'best')
plt.show()
In [ ]:
test_x[0*35::1000, 9]
In [ ]:
pnames = emu.get_param_names()
for i in xrange(7,11):
for j in xrange(0,1000):
mean_acc = np.mean(acc_bin[j::1000])
plt.scatter(test_x[j, i], mean_acc, label = 'HOD %d'%j, alpha = 0.6)
plt.xlabel(pnames[i])
plt.ylabel('Avg. Percent Accurate')
plt.title('r = %.2f'%scale_bin_centers[bin_idx])
#plt.legend(loc = 'best')
plt.show()
In [ ]:
mcut = 13.5
sub_test_idx = np.logical_and(test_x[:, 9]>mcut, test_x[:, 7] < mcut)
print np.mean(acc_bin[sub_test_idx]), np.sum(sub_test_idx)
In [ ]:
plt.scatter(test_x[:1000, 9], sat_fracs)
plt.xlabel('logM1')
plt.ylabel('Sat Frac')
In [ ]:
plt.scatter(test_x[:1000, 9], log_mMins)
plt.xlabel('logM1')
plt.ylabel('logMmin')
In [ ]:
plt.hist(1e4*(actual_nd-nd) )
In [ ]:
plt.scatter(test_x[:1000, 9], 1e4*(actual_nd-nd) )
plt.xlabel('logM1')
plt.ylabel('Actual nd - Fixed nd')
In [ ]:
good_nd_idxs = np.isclose(actual_nd, nd)
print np.sum(good_nd_idxs)/1000.
In [ ]:
# NOTE sat_fracs uses actual_nd, so these is a weird selection
good_satfrac_idxs = np.logical_and(0.1 < sat_fracs, sat_fracs < 0.5)
print np.sum(good_satfrac_idxs)/1000.
In [ ]:
print np.sum(np.logical_and(good_satfrac_idxs, good_nd_idxs))/1000.
In [ ]: